clear
set more off

cap log close

**** Data ******

*** Children's names ***

use $tempdir/1850_fathers.dta, clear

keep if sex==1 & relate==3 & age>=0 & age<=15 & noname==0 
gen farmer = (occ_50_father == 100)
egen name_count=count(sex), by(firstsoundex)
collapse (mean) year name_count logocc1900_father = logoccwage1900_*_father logoccscore_father = logoccscore_*_father ///
logoccPHall_father = logoccwage_PHall_*_father logoccPHnof_father = logoccwage_PHnofarm_*_father logocclido_father = logocclido_*_father ///
logoccPHimpf_father = logoccwage_PHimpfarm_*_father occpctile_father = occpctile_owndist*father ///
occpctile_50dist_father = occpctile_50dist*father occpctile_60dist_father = occpctile_60dist*father ///
byname_farmer = farmer byname_age = age byname_nsibs = num_sib byname_nbro = num_bro byname_nsis = num_sis ///
byname_agerank = age_rank_child_* byname_agerankownsex = age_rank_ownsex_child_* byname_urban = urban /// 
byname_dadage = age_*_father byname_momage = age_*_mother byname_dadlit = lit_*_father byname_momlit = lit_*_mother ///
byname_dadageatbirth = ageatbirth_*_father byname_momageatbirth = ageatbirth_*_mother ///
byname_dadfor = foreign_*_father byname_momfor = foreign_*_mother byname_foreign = foreign_50 [aw=perwt], by(firstsoundex)
sort firstsoundex
gen sex = 1
gen year1 = 1850


foreach var in $incvars{
xtile qtl_`var'_unwgt_full = logocc`var'_father, nq(4)
xtile qtl_`var'_wgt_full = logocc`var'_father [fw=name_count], nq(4)
}


save $tempdir/childnames_income_sdx.dta, replace

use $tempdir/1850_fathers.dta, clear

gen region_birth_coarse = floor(region_birth/10)
replace region_birth_coarse = 5 if bpl>=100 & bpl!=.
tab region_birth_coarse

keep if sex==1 & relate==3 & age>=0 & age<=15 & noname==0 
gen farmer = (occ_50_father == 100)
egen name_count_byreg=count(sex), by(firstsoundex)
collapse (mean) year name_count logocc1900_father_reg = logoccwage1900_*_father logoccscore_father_reg = logoccscore_*_father ///
logoccPHall_father_reg = logoccwage_PHall_*_father logoccPHnof_father_reg = logoccwage_PHnofarm_*_father logocclido_father_reg = logocclido_*_father ///
logoccPHimpf_father_reg = logoccwage_PHimpfarm_*_father occpctile_father_reg = occpctile_owndist*father ///
occpctile_50dist_father_reg = occpctile_50dist*father occpctile_60dist_father_reg = occpctile_60dist*father  ///
bynamereg_farmer = farmer bynamereg_age = age bynamereg_nsibs = num_sib bynamereg_nbro = num_bro bynamereg_nsis = num_sis ///
bynamereg_agerank = age_rank_child_* bynamereg_agerankownsex = age_rank_ownsex_child_* bynamereg_urban = urban /// 
bynamereg_dadage = age_*_father bynamereg_momage = age_*_mother bynamereg_dadlit = lit_*_father bynamereg_momlit = lit_*_mother ///
bynamereg_dadageatbirth = ageatbirth_*_father bynamereg_momageatbirth = ageatbirth_*_mother ///
bynamereg_dadfor = foreign_*_father bynamereg_momfor = foreign_*_mother bynamereg_foreign = foreign_50 [aw=perwt], by(firstsoundex region_birth_coarse)
sort firstsoundex
gen sex = 1
gen year1 = 1850

foreach var in $incvars{
xtile qtl_reg_`var'_unwgt_full = logocc`var'_father_reg, nq(4)
xtile qtl_reg_`var'_wgt_full = logocc`var'_father_reg [fw=name_count], nq(4)

}


save $tempdir/childnames_byregion_income_sdx.dta, replace

use $tempdir/1850_fathers.dta, clear
keep if sex==2 & relate==3 & age>=0 & age<=15 & noname==0 
gen farmer = (occ_50_father == 100)
egen name_count=count(sex), by(firstsoundex)
collapse (mean) year name_count logocc1900_father = logoccwage1900_*_father logoccscore_father = logoccscore_*_father ///
logoccPHall_father = logoccwage_PHall_*_father logoccPHnof_father = logoccwage_PHnofarm_*_father logocclido_father = logocclido_*_father ///
logoccPHimpf_father = logoccwage_PHimpfarm_*_father occpctile_father = occpctile_owndist*father ///
occpctile_50dist_father = occpctile_50dist*father occpctile_60dist_father = occpctile_60dist*father  ///
byname_farmer = farmer byname_age = age byname_nsibs = num_sib byname_nbro = num_bro byname_nsis = num_sis ///
byname_agerank = age_rank_child_* byname_agerankownsex = age_rank_ownsex_child_* byname_urban = urban /// 
byname_dadage = age_*_father byname_momage = age_*_mother byname_dadlit = lit_*_father byname_momlit = lit_*_mother ///
byname_dadageatbirth = ageatbirth_*_father byname_momageatbirth = ageatbirth_*_mother ///
byname_dadfor = foreign_*_father byname_momfor = foreign_*_mother byname_foreign = foreign_50 [aw=perwt], by(firstsoundex)
sort firstsoundex
gen sex = 2
gen year1 = 1850


foreach var in $incvars{
xtile qtl_`var'_unwgt_full = logocc`var'_father, nq(4)
xtile qtl_`var'_wgt_full = logocc`var'_father [fw=name_count], nq(4)
}


append using $tempdir/childnames_income_sdx.dta
save $tempdir/childnames_income_sdx.dta, replace

use $tempdir/1850_fathers.dta, clear

gen region_birth_coarse = floor(region_birth/10)
replace region_birth_coarse = 5 if bpl>=100 & bpl!=.
tab region_birth_coarse

keep if sex==2 & relate==3 & age>=0 & age<=15 & noname==0 
gen farmer = (occ_50_father == 100)
egen name_count_byreg=count(sex), by(firstsoundex)
collapse (mean) year name_count logocc1900_father_reg = logoccwage1900_*_father logoccscore_father_reg = logoccscore_*_father ///
logoccPHall_father_reg = logoccwage_PHall_*_father logoccPHnof_father_reg = logoccwage_PHnofarm_*_father logocclido_father_reg = logocclido_*_father ///
logoccPHimpf_father_reg = logoccwage_PHimpfarm_*_father occpctile_father_reg = occpctile_owndist*father ///
occpctile_50dist_father_reg = occpctile_50dist*father occpctile_60dist_father_reg = occpctile_60dist*father ///
bynamereg_farmer = farmer bynamereg_age = age bynamereg_nsibs = num_sib bynamereg_nbro = num_bro bynamereg_nsis = num_sis ///
bynamereg_agerank = age_rank_child_* bynamereg_agerankownsex = age_rank_ownsex_child_* bynamereg_urban = urban /// 
bynamereg_dadage = age_*_father bynamereg_momage = age_*_mother bynamereg_dadlit = lit_*_father bynamereg_momlit = lit_*_mother ///
bynamereg_dadageatbirth = ageatbirth_*_father bynamereg_momageatbirth = ageatbirth_*_mother ///
bynamereg_dadfor = foreign_*_father bynamereg_momfor = foreign_*_mother bynamereg_foreign = foreign_50 [aw=perwt], by(firstsoundex region_birth_coarse)
sort firstsoundex
gen sex = 2
gen year1 = 1850


foreach var in $incvars{
xtile qtl_reg_`var'_unwgt_full = logocc`var'_father_reg, nq(4)
xtile qtl_reg_`var'_wgt_full = logocc`var'_father_reg [fw=name_count], nq(4)

}


append using $tempdir/childnames_byregion_income_sdx.dta
save $tempdir/childnames_byregion_income_sdx.dta, replace

local s1850 "50"
local s1860 "60"
local s1870 "70"
local s1880 "80"
local s1900 "00"
local s1910 "10"
local s1920 "20"

foreach y in 1860 1870 1880 1900 1910 1920 {

	use $tempdir/`y'_fathers.dta, clear
	keep if sex==1 & relate==3 & age>=0 & age<=15 & noname==0 
	gen farmer = (occ_`s`y''_father == 100)
	egen name_count=count(sex), by(firstsoundex)
	collapse (mean) year name_count logocc1900_father = logoccwage1900_*_father logoccscore_father = logoccscore_*_father ///
	logoccPHall_father = logoccwage_PHall_*_father logoccPHnof_father = logoccwage_PHnofarm_*_father logocclido_father = logocclido_*_father ///
	logoccPHimpf_father = logoccwage_PHimpfarm_*_father occpctile_father = occpctile_owndist*father ///
	occpctile_50dist_father = occpctile_50dist*father occpctile_60dist_father = occpctile_60dist*father  ///
	byname_farmer = farmer byname_age = age byname_nsibs = num_sib byname_nbro = num_bro byname_nsis = num_sis ///
	byname_agerank = age_rank_child_* byname_agerankownsex = age_rank_ownsex_child_* byname_urban = urban /// 
	byname_dadage = age_*_father byname_momage = age_*_mother byname_dadlit = lit_*_father byname_momlit = lit_*_mother ///
	byname_dadageatbirth = ageatbirth_*_father byname_momageatbirth = ageatbirth_*_mother ///
	byname_dadfor = foreign_*_father byname_momfor = foreign_*_mother byname_foreign = foreign_`s`y'' [aw=perwt], by(firstsoundex)
	sort firstsoundex
	gen sex = 1
	gen year1 = `y'
	

	foreach var in $incvars{
		xtile qtl_`var'_unwgt_full = logocc`var'_father, nq(4)
		xtile qtl_`var'_wgt_full = logocc`var'_father [fw=name_count], nq(4)

		}

	append using $tempdir/childnames_income_sdx.dta
	save $tempdir/childnames_income_sdx.dta, replace

	use $tempdir/`y'_fathers.dta, clear
	keep if sex==2 & relate==3 & age>=0 & age<=15 & noname==0 
	gen farmer = (occ_`s`y''_father == 100)
	egen name_count=count(sex), by(firstsoundex)
	collapse (mean) year name_count logocc1900_father = logoccwage1900_*_father logoccscore_father = logoccscore_*_father ///
	logoccPHall_father = logoccwage_PHall_*_father logoccPHnof_father = logoccwage_PHnofarm_*_father logocclido_father = logocclido_*_father ///
	logoccPHimpf_father = logoccwage_PHimpfarm_*_father occpctile_father = occpctile_owndist*father ///
	occpctile_50dist_father = occpctile_50dist*father occpctile_60dist_father = occpctile_60dist*father ///
	byname_farmer = farmer byname_age = age byname_nsibs = num_sib byname_nbro = num_bro byname_nsis = num_sis ///
	byname_agerank = age_rank_child_* byname_agerankownsex = age_rank_ownsex_child_* byname_urban = urban /// 
	byname_dadage = age_*_father byname_momage = age_*_mother byname_dadlit = lit_*_father byname_momlit = lit_*_mother ///
	byname_dadageatbirth = ageatbirth_*_father byname_momageatbirth = ageatbirth_*_mother ///
	byname_dadfor = foreign_*_father byname_momfor = foreign_*_mother byname_foreign = foreign_`s`y'' [aw=perwt], by(firstsoundex)
	sort firstsoundex
	gen sex = 2
	gen year1 = `y'
	

	foreach var in $incvars{
		xtile qtl_`var'_unwgt_full = logocc`var'_father, nq(4)
		xtile qtl_`var'_wgt_full = logocc`var'_father [fw=name_count], nq(4)

		}

	append using $tempdir/childnames_income_sdx.dta
	save $tempdir/childnames_income_sdx.dta, replace
	
	*** By region ****
	
	use $tempdir/`y'_fathers.dta, clear
	
	gen region_birth_coarse = floor(region_birth/10)
	replace region_birth_coarse = 5 if bpl>=100 & bpl!=.
	tab region_birth_coarse

	keep if sex==1 & relate==3 & age>=0 & age<=15 & noname==0 
	gen farmer = (occ_`s`y''_father == 100)
	egen name_count_byreg=count(sex), by(firstsoundex)
	collapse (mean) year name_count logocc1900_father_reg = logoccwage1900_*_father logoccscore_father_reg = logoccscore_*_father ///
	logoccPHall_father_reg = logoccwage_PHall_*_father logoccPHnof_father_reg = logoccwage_PHnofarm_*_father logocclido_father_reg = logocclido_*_father ///
	logoccPHimpf_father_reg = logoccwage_PHimpfarm_*_father occpctile_father_reg = occpctile_owndist*father ///
	occpctile_50dist_father_reg = occpctile_50dist*father occpctile_60dist_father_reg = occpctile_60dist*father ///
	bynamereg_farmer = farmer bynamereg_age = age bynamereg_nsibs = num_sib bynamereg_nbro = num_bro bynamereg_nsis = num_sis ///
	bynamereg_agerank = age_rank_child_* bynamereg_agerankownsex = age_rank_ownsex_child_* bynamereg_urban = urban /// 
	bynamereg_dadage = age_*_father bynamereg_momage = age_*_mother bynamereg_dadlit = lit_*_father bynamereg_momlit = lit_*_mother ///
	bynamereg_dadageatbirth = ageatbirth_*_father bynamereg_momageatbirth = ageatbirth_*_mother ///
	bynamereg_dadfor = foreign_*_father bynamereg_momfor = foreign_*_mother bynamereg_foreign = foreign_`s`y'' [aw=perwt], by(firstsoundex region_birth_coarse)
	sort firstsoundex
	gen sex = 1
	gen year1 = `y'
	
	foreach var in $incvars{
		xtile qtl_reg_`var'_unwgt_full = logocc`var'_father_reg, nq(4)
		xtile qtl_reg_`var'_wgt_full = logocc`var'_father_reg [fw=name_count], nq(4)

		}


	append using $tempdir/childnames_byregion_income_sdx.dta
	save $tempdir/childnames_byregion_income_sdx.dta, replace

	use $tempdir/`y'_fathers.dta, clear
	
	gen region_birth_coarse = floor(region_birth/10)
	replace region_birth_coarse = 5 if bpl>=100 & bpl!=.
	tab region_birth_coarse
	
	keep if sex==2 & relate==3 & age>=0 & age<=15 & noname==0 
	gen farmer = (occ_`s`y''_father == 100)
	egen name_count_byreg=count(sex), by(firstsoundex)
	collapse (mean) year name_count logocc1900_father_reg = logoccwage1900_*_father logoccscore_father_reg = logoccscore_*_father ///
	logoccPHall_father_reg = logoccwage_PHall_*_father logoccPHnof_father_reg = logoccwage_PHnofarm_*_father logocclido_father_reg = logocclido_*_father ///
	logoccPHimpf_father_reg = logoccwage_PHimpfarm_*_father occpctile_father_reg = occpctile_owndist*father ///
	occpctile_50dist_father_reg = occpctile_50dist*father occpctile_60dist_father_reg = occpctile_60dist*father ///
	bynamereg_farmer = farmer bynamereg_age = age bynamereg_nsibs = num_sib bynamereg_nbro = num_bro bynamereg_nsis = num_sis ///
	bynamereg_agerank = age_rank_child_* bynamereg_agerankownsex = age_rank_ownsex_child_* bynamereg_urban = urban /// 
	bynamereg_dadage = age_*_father bynamereg_momage = age_*_mother bynamereg_dadlit = lit_*_father bynamereg_momlit = lit_*_mother ///
	bynamereg_dadageatbirth = ageatbirth_*_father bynamereg_momageatbirth = ageatbirth_*_mother ///
	bynamereg_dadfor = foreign_*_father bynamereg_momfor = foreign_*_mother bynamereg_foreign = foreign_`s`y'' [aw=perwt], by(firstsoundex region_birth_coarse)
	sort firstsoundex
	gen sex = 2
	gen year1 = `y'
	

	foreach var in $incvars{
		xtile qtl_reg_`var'_unwgt_full = logocc`var'_father_reg, nq(4)
		xtile qtl_reg_`var'_wgt_full = logocc`var'_father_reg [fw=name_count], nq(4)

		}


	append using $tempdir/childnames_byregion_income_sdx.dta
	save $tempdir/childnames_byregion_income_sdx.dta, replace	

}

	
***** 30-year*****

use $rawdir/1880_1%.dta, clear

	set seed 828
	gen randomorder = runiform()
	
	gen region_birth = 11 if bpl==9 | bpl==23 | bpl==25 | bpl==33 | bpl==44 | bpl==50
	replace region_birth = 12 if bpl==36 | bpl==34 | bpl==42
	replace region_birth = 21 if bpl==17 | bpl==18 | bpl==26 | bpl==39 | bpl==55
	replace region_birth = 22 if bpl==19 | bpl==20 | bpl==27 | bpl==29 | bpl==31 | bpl==38 | bpl==46
	replace region_birth = 31 if bpl== 10 | bpl==11 | bpl==24 | bpl==12 | bpl==13 | bpl==37 | bpl==45 | bpl==51 | bpl==54
	replace region_birth = 32 if bpl==1 | bpl==21 | bpl==28 | bpl==47
	replace region_birth = 33 if bpl==5 | bpl==22 | bpl==40 | bpl==48
	replace region_birth = 41 if bpl==4 | bpl==8 | bpl==16 | bpl==32 | bpl==30 | bpl==35 | bpl==56 | bpl==49
	replace region_birth = 42 if bpl==6 | bpl==41 | bpl==53
	
	gen region_birth_coarse = floor(region_birth/10)
	replace region_birth_coarse = 5 if bpl>=100 & bpl!=.
	
	*** NAMES
	*** first name 
	gen str first=word(namefrst,1)
	replace first=subinstr(first, ".", "", .)
	replace first=trim(first)
	replace first=proper(first)
	*** middle name
	gen str middle=word(namefrst,2)
	replace middle=proper(middle)
	
	gen init = "X" if middle!=""
	
	*** obvious abbreviations (not nicknames)
	replace first="William" if first=="Wm"
	replace first="George" if first=="Geo"
	replace first="Charles" if first=="Chas"
	replace first="Daniel" if first=="Danl"
	replace first="James" if first=="Jas"
	replace first="Joseph" if first=="Jos"
	replace first="Robert" if first=="Robt"
	replace first="Richard" if first=="Richd"
	replace first="Samuel" if first=="Saml"
	replace first="Thomas" if first=="Thos"
	replace first="Frederick" if first=="Fredk"
	replace first="Frederick" if first=="Fred'K" 
	replace first="John" if first=="Jno" 
	replace first="Samuel" if first=="Sam'L"  
	replace first="Thomas" if first=="Tho" 
	replace first="Michael" if first=="Michl"

	gen firstsoundex = soundex(first)	

	drop if age < 20 

	gen missing = (occscore == .)
	sort sex missing occscore randomorder
	by sex missing: gen qtl_ownoccscore = ceil((_n / _N) / 0.25)
	replace qtl_ownoccscore = . if missing == 1
	drop missing
		
gen year1 = 1850
gen year2 = 1880

gen husb = sex==1 & relate==1 & sploc==2
gen wife = sex==2 & relate==2 & sploc==1

tab husb if sex==1
tab wife if sex==2

sort serial relate
gen husbbpl = bpl if husb==1
replace husbbpl = husbbpl[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wifebpl = bpl if wife==1
replace wifebpl = wifebpl[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen husbrace = race if husb==1
replace husbrace = husbrace[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wiferace = race if wife==1
replace wiferace = wiferace[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen husbage = age if husb==1
replace husbage = husbage[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wifeage = age if wife==1
replace wifeage = wifeage[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen spouseage = husbage if wife==1
replace spouseage = wifeage if husb==1
gen agegap = husbage - wifeage

gen spousebpl = husbbpl if wife==1
replace spousebpl = wifebpl if husb==1

gen spouserace = husbrace if wife==1
replace spouserace = wiferace if husb==1

gen husboccscore = occscore if husb==1
replace husboccscore = husboccscore[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1
gen husboccscoreqtl = qtl_ownoccscore if husb==1
replace husboccscoreqtl = husboccscoreqtl[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen spouseregion_birth = 11 if spousebpl==9 | spousebpl==23 | spousebpl==25 | spousebpl==33 | spousebpl==44 | spousebpl==50
replace spouseregion_birth = 12 if spousebpl==36 | spousebpl==34 | spousebpl==42
replace spouseregion_birth = 21 if spousebpl==17 | spousebpl==18 | spousebpl==26 | spousebpl==39 | spousebpl==55
replace spouseregion_birth = 22 if spousebpl==19 | spousebpl==20 | spousebpl==27 | spousebpl==29 | spousebpl==31 | spousebpl==38 | spousebpl==46
replace spouseregion_birth = 31 if spousebpl== 10 | spousebpl==11 | spousebpl==24 | spousebpl==12 | spousebpl==13 | spousebpl==37 | spousebpl==45 | spousebpl==51 | spousebpl==54
replace spouseregion_birth = 32 if spousebpl==1 | spousebpl==21 | spousebpl==28 | spousebpl==47
replace spouseregion_birth = 33 if spousebpl==5 | spousebpl==22 | spousebpl==40 | spousebpl==48
replace spouseregion_birth = 41 if spousebpl==4 | spousebpl==8 | spousebpl==16 | spousebpl==32 | spousebpl==30 | spousebpl==35 | spousebpl==56 | spousebpl==49
replace spouseregion_birth = 42 if spousebpl==6 | spousebpl==41 | spousebpl==53
	
gen spouseregion_birth_coarse = floor(spouseregion_birth/10)
replace spouseregion_birth_coarse = 5 if spousebpl>=100 & spousebpl!=.

drop husb wife

keep if age>=30 & age<=45

mer m:1 year1 sex firstsoundex using $tempdir/childnames_income_sdx.dta, keepus(name_count* logocc* qtl* byname_*)
drop if _merge==2
gen linked = _merge==3
drop _merge

mer m:1 year1 sex firstsoundex region_birth_coarse using $tempdir/childnames_byregion_income_sdx.dta, keepus(name_count* logocc* qtl* bynamereg_*)
drop if _merge==2
gen linked_byreg = _merge==3
drop _merge

foreach var in $incvars{
	gen missing = (logocc`var'_father == .)
	sort sex missing logocc`var'_father randomorder
	by sex missing: gen qtl_`var'_adultdist = ceil((_n / _N) / 0.25)
	replace qtl_`var'_adultdist = . if missing == 1
	drop missing
	
	gen missing = (logocc`var'_father_reg == .)
	sort sex missing logocc`var'_father_reg randomorder
	by sex missing: gen qtl_reg_`var'_adultdist = ceil((_n / _N) / 0.25)
	replace qtl_reg_`var'_adultdist = . if missing == 1
	drop missing
}

gen married = marst==1 | marst==2
gen ever_married = marst!=6

**** Assign husband's and wife's father's occwealth***

gen husb = sex==1 & relate==1 & sploc==2
gen wife = sex==2 & relate==2 & sploc==1

tab husb if sex==1
tab wife if sex==2

sort serial relate
gen husbfirstsoundex = firstsoundex if husb==1
replace husbfirstsoundex = husbfirstsoundex[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wifefirstsoundex = firstsoundex if wife==1
replace wifefirstsoundex = wifefirstsoundex[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen spousefirstsoundex = husbfirstsoundex if wife==1
replace spousefirstsoundex = wifefirstsoundex if husb==1
gen spousesex = 1 if wife==1
replace spousesex = 2 if husb==1

rename sex ownsex
rename firstsoundex ownfirstsoundex
rename spousefirstsoundex firstsoundex
rename spousesex sex

foreach var in name_count* logocc* qtl* byname* {
	rename `var' own`var'
}

mer m:1 year1 sex firstsoundex using $tempdir/childnames_income_sdx.dta, keepus(name_count* logocc* qtl* byname_*)
drop if _merge==2
gen spouselinked = _merge==3
drop _merge

mer m:1 year1 sex firstsoundex region_birth_coarse using $tempdir/childnames_byregion_income_sdx.dta, keepus(name_count* logocc* qtl* bynamereg_*)
drop if _merge==2
gen spouselinked_byreg = _merge==3
drop _merge

foreach var in $incvars{
	gen missing = (logocc`var'_father == .)
	sort sex missing logocc`var'_father randomorder
	by sex missing: gen qtl_`var'_adultdist = ceil((_n / _N) / 0.25)
	replace qtl_`var'_adultdist = . if missing == 1
	drop missing
	
	gen missing = (logocc`var'_father_reg == .)
	sort sex missing logocc`var'_father_reg randomorder
	by sex missing: gen qtl_reg_`var'_adultdist = ceil((_n / _N) / 0.25)
	replace qtl_reg_`var'_adultdist = . if missing == 1
	drop missing
}

rename firstsoundex spousefirstsoundex
rename sex spousesex
rename ownsex sex
rename ownfirstsoundex firstsoundex	

foreach var in name_count* logocc* qtl* byname*{
	rename `var' `var'_spouse
	rename own`var' `var'
}

foreach var in $incvars{
	gen rankocc`var'_father = .
	xtile x = logocc`var'_father if sex==1, nq(100)
	xtile y = logocc`var'_father if sex==2, nq(100)
	replace rankocc`var'_father = x if sex==1
	replace rankocc`var'_father = y if sex==2
	
	gen rankocc`var'_father_spouse = .
	xtile w = logocc`var'_father_spouse if sex==1, nq(100)
	xtile z = logocc`var'_father_spouse if sex==2, nq(100)
	replace rankocc`var'_father_spouse = w if sex==1
	replace rankocc`var'_father_spouse = z if sex==2

	drop w x y z
}

save $tempdir/panel_trends_30yr_income_sdx.dta, replace

foreach y in $yearlistshort {

	use $rawdir/`y'_1%.dta, clear
	
	set seed 828
	gen randomorder = runiform()


	gen region_birth = 11 if bpl==9 | bpl==23 | bpl==25 | bpl==33 | bpl==44 | bpl==50
	replace region_birth = 12 if bpl==36 | bpl==34 | bpl==42
	replace region_birth = 21 if bpl==17 | bpl==18 | bpl==26 | bpl==39 | bpl==55
	replace region_birth = 22 if bpl==19 | bpl==20 | bpl==27 | bpl==29 | bpl==31 | bpl==38 | bpl==46
	replace region_birth = 31 if bpl== 10 | bpl==11 | bpl==24 | bpl==12 | bpl==13 | bpl==37 | bpl==45 | bpl==51 | bpl==54
	replace region_birth = 32 if bpl==1 | bpl==21 | bpl==28 | bpl==47
	replace region_birth = 33 if bpl==5 | bpl==22 | bpl==40 | bpl==48
	replace region_birth = 41 if bpl==4 | bpl==8 | bpl==16 | bpl==32 | bpl==30 | bpl==35 | bpl==56 | bpl==49
	replace region_birth = 42 if bpl==6 | bpl==41 | bpl==53
	
	gen region_birth_coarse = floor(region_birth/10)
	replace region_birth_coarse = 5 if bpl>=100 & bpl!=.
	
	*** NAMES
	*** first name 
	gen str first=word(namefrst,1)
	replace first=subinstr(first, ".", "", .)
	replace first=trim(first)
	replace first=proper(first)
	*** middle name
	gen str middle=word(namefrst,2)
	replace middle=proper(middle)
	
	gen init = "X" if middle!=""
	
	*** obvious abbreviations (not nicknames)
	replace first="William" if first=="Wm"
	replace first="George" if first=="Geo"
	replace first="Charles" if first=="Chas"
	replace first="Daniel" if first=="Danl"
	replace first="James" if first=="Jas"
	replace first="Joseph" if first=="Jos"
	replace first="Robert" if first=="Robt"
	replace first="Richard" if first=="Richd"
	replace first="Samuel" if first=="Saml"
	replace first="Thomas" if first=="Thos"
	replace first="Frederick" if first=="Fredk"
	replace first="Frederick" if first=="Fred'K" 
	replace first="John" if first=="Jno" 
	replace first="Samuel" if first=="Sam'L"  
	replace first="Thomas" if first=="Tho" 
	replace first="Michael" if first=="Michl"

	gen firstsoundex = soundex(first)	

	drop if age < 20 

	gen missing = (occscore == .)
	sort sex missing occscore randomorder
	by sex missing: gen qtl_ownoccscore = ceil((_n / _N) / 0.25)
	replace qtl_ownoccscore = . if missing == 1
	drop missing

gen year1 = `y'-30
gen year2 = `y'

gen husb = sex==1 & relate==1 & sploc==2
gen wife = sex==2 & relate==2 & sploc==1

tab husb if sex==1
tab wife if sex==2

sort serial relate
gen husbbpl = bpl if husb==1
replace husbbpl = husbbpl[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wifebpl = bpl if wife==1
replace wifebpl = wifebpl[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen husbrace = race if husb==1
replace husbrace = husbrace[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wiferace = race if wife==1
replace wiferace = wiferace[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen husbage = age if husb==1
replace husbage = husbage[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wifeage = age if wife==1
replace wifeage = wifeage[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen spouseage = husbage if wife==1
replace spouseage = wifeage if husb==1
gen agegap = husbage - wifeage

gen spousebpl = husbbpl if wife==1
replace spousebpl = wifebpl if husb==1

gen spouserace = husbrace if wife==1
replace spouserace = wiferace if husb==1

gen husboccscore = occscore if husb==1
replace husboccscore = husboccscore[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1
gen husboccscoreqtl = qtl_ownoccscore if husb==1
replace husboccscoreqtl = husboccscoreqtl[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen spouseregion_birth = 11 if spousebpl==9 | spousebpl==23 | spousebpl==25 | spousebpl==33 | spousebpl==44 | spousebpl==50
replace spouseregion_birth = 12 if spousebpl==36 | spousebpl==34 | spousebpl==42
replace spouseregion_birth = 21 if spousebpl==17 | spousebpl==18 | spousebpl==26 | spousebpl==39 | spousebpl==55
replace spouseregion_birth = 22 if spousebpl==19 | spousebpl==20 | spousebpl==27 | spousebpl==29 | spousebpl==31 | spousebpl==38 | spousebpl==46
replace spouseregion_birth = 31 if spousebpl== 10 | spousebpl==11 | spousebpl==24 | spousebpl==12 | spousebpl==13 | spousebpl==37 | spousebpl==45 | spousebpl==51 | spousebpl==54
replace spouseregion_birth = 32 if spousebpl==1 | spousebpl==21 | spousebpl==28 | spousebpl==47
replace spouseregion_birth = 33 if spousebpl==5 | spousebpl==22 | spousebpl==40 | spousebpl==48
replace spouseregion_birth = 41 if spousebpl==4 | spousebpl==8 | spousebpl==16 | spousebpl==32 | spousebpl==30 | spousebpl==35 | spousebpl==56 | spousebpl==49
replace spouseregion_birth = 42 if spousebpl==6 | spousebpl==41 | spousebpl==53
	
gen spouseregion_birth_coarse = floor(spouseregion_birth/10)
replace spouseregion_birth_coarse = 5 if spousebpl>=100 & spousebpl!=.

drop husb wife

keep if age>=30 & age<=45

mer m:1 year1 sex firstsoundex using $tempdir/childnames_income_sdx.dta, keepus(name_count* logocc* qtl* byname_*)
drop if _merge==2
gen linked = _merge==3
drop _merge

mer m:1 year1 sex firstsoundex region_birth_coarse using $tempdir/childnames_byregion_income_sdx.dta, keepus(name_count* logocc* qtl* bynamereg_*)
drop if _merge==2
gen linked_byreg = _merge==3
drop _merge

foreach var in $incvars{
	gen missing = (logocc`var'_father == .)
	sort sex missing logocc`var'_father randomorder
	by sex missing: gen qtl_`var'_adultdist = ceil((_n / _N) / 0.25)
	replace qtl_`var'_adultdist = . if missing == 1
	drop missing
	
	gen missing = (logocc`var'_father_reg == .)
	sort sex missing logocc`var'_father_reg randomorder
	by sex missing: gen qtl_reg_`var'_adultdist = ceil((_n / _N) / 0.25)
	replace qtl_reg_`var'_adultdist = . if missing == 1
	drop missing
}

gen married = marst==1 | marst==2
gen ever_married = marst!=6

**** Assign husband's and wife's father's occwealth ***

gen husb = sex==1 & relate==1 & sploc==2
gen wife = sex==2 & relate==2 & sploc==1

tab husb if sex==1
tab wife if sex==2

sort serial relate
gen husbfirstsoundex = firstsoundex if husb==1
replace husbfirstsoundex = husbfirstsoundex[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wifefirstsoundex = firstsoundex if wife==1
replace wifefirstsoundex = wifefirstsoundex[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen spousefirstsoundex = husbfirstsoundex if wife==1
replace spousefirstsoundex = wifefirstsoundex if husb==1
gen spousesex = 1 if wife==1
replace spousesex = 2 if husb==1

rename sex ownsex
rename firstsoundex ownfirstsoundex
rename spousefirstsoundex firstsoundex
rename spousesex sex

foreach var in name_count* logocc* qtl* byname* {
	rename `var' own`var'
}

mer m:1 year1 sex firstsoundex using $tempdir/childnames_income_sdx.dta, keepus(name_count* logocc* qtl* byname_*)
drop if _merge==2
gen spouselinked = _merge==3
drop _merge

mer m:1 year1 sex firstsoundex region_birth_coarse using $tempdir/childnames_byregion_income_sdx.dta, keepus(name_count* logocc* qtl* bynamereg_*)
drop if _merge==2
gen spouselinked_byreg = _merge==3
drop _merge

foreach var in $incvars{
	gen missing = (logocc`var'_father == .)
	sort sex missing logocc`var'_father randomorder
	by sex missing: gen qtl_`var'_adultdist = ceil((_n / _N) / 0.25)
	replace qtl_`var'_adultdist = . if missing == 1
	drop missing
	
	gen missing = (logocc`var'_father_reg == .)
	sort sex missing logocc`var'_father_reg randomorder
	by sex missing: gen qtl_reg_`var'_adultdist = ceil((_n / _N) / 0.25)
	replace qtl_reg_`var'_adultdist = . if missing == 1
	drop missing
}

rename firstsoundex spousefirstsoundex
rename sex spousesex
rename ownsex sex
rename ownfirstsoundex firstsoundex	

foreach var in name_count* logocc* qtl* byname* {
	rename `var' `var'_spouse
	rename own`var' `var'
}
	
foreach var in $incvars{
	gen rankocc`var'_father = .
	xtile x = logocc`var'_father if sex==1, nq(100)
	xtile y = logocc`var'_father if sex==2, nq(100)
	replace rankocc`var'_father = x if sex==1
	replace rankocc`var'_father = y if sex==2
	
	gen rankocc`var'_father_spouse = .
	xtile w = logocc`var'_father_spouse if sex==1, nq(100)
	xtile z = logocc`var'_father_spouse if sex==2, nq(100)
	replace rankocc`var'_father_spouse = w if sex==1
	replace rankocc`var'_father_spouse = z if sex==2

	drop w x y z
}
	
	append using $tempdir/panel_trends_30yr_income_sdx.dta
	save $tempdir/panel_trends_30yr_income_sdx.dta, replace
	
}

use $rawdir/1920_1%.dta, clear
	
	set seed 828
	gen randomorder = runiform()
	
	gen region_birth = 11 if bpl==9 | bpl==23 | bpl==25 | bpl==33 | bpl==44 | bpl==50
	replace region_birth = 12 if bpl==36 | bpl==34 | bpl==42
	replace region_birth = 21 if bpl==17 | bpl==18 | bpl==26 | bpl==39 | bpl==55
	replace region_birth = 22 if bpl==19 | bpl==20 | bpl==27 | bpl==29 | bpl==31 | bpl==38 | bpl==46
	replace region_birth = 31 if bpl== 10 | bpl==11 | bpl==24 | bpl==12 | bpl==13 | bpl==37 | bpl==45 | bpl==51 | bpl==54
	replace region_birth = 32 if bpl==1 | bpl==21 | bpl==28 | bpl==47
	replace region_birth = 33 if bpl==5 | bpl==22 | bpl==40 | bpl==48
	replace region_birth = 41 if bpl==4 | bpl==8 | bpl==16 | bpl==32 | bpl==30 | bpl==35 | bpl==56 | bpl==49
	replace region_birth = 42 if bpl==6 | bpl==41 | bpl==53
	
	gen region_birth_coarse = floor(region_birth/10)
	replace region_birth_coarse = 5 if bpl>=100 & bpl!=.
	
	*** NAMES
	*** first name 
	gen str first=word(namefrst,1)
	replace first=subinstr(first, ".", "", .)
	replace first=trim(first)
	replace first=proper(first)
	*** middle name
	gen str middle=word(namefrst,2)
	replace middle=proper(middle)
	
	gen init = "X" if middle!=""
	
	*** obvious abbreviations (not nicknames)
	replace first="William" if first=="Wm"
	replace first="George" if first=="Geo"
	replace first="Charles" if first=="Chas"
	replace first="Daniel" if first=="Danl"
	replace first="James" if first=="Jas"
	replace first="Joseph" if first=="Jos"
	replace first="Robert" if first=="Robt"
	replace first="Richard" if first=="Richd"
	replace first="Samuel" if first=="Saml"
	replace first="Thomas" if first=="Thos"
	replace first="Frederick" if first=="Fredk"
	replace first="Frederick" if first=="Fred'K" 
	replace first="John" if first=="Jno" 
	replace first="Samuel" if first=="Sam'L"  
	replace first="Thomas" if first=="Tho" 
	replace first="Michael" if first=="Michl"

	gen firstsoundex = soundex(first)	

	drop if age < 20 

	gen missing = (occscore == .)
	sort sex missing occscore randomorder
	by sex missing: gen qtl_ownoccscore = ceil((_n / _N) / 0.25)
	replace qtl_ownoccscore = . if missing == 1
	drop missing

gen year1 = 1920-30
gen year2 = 1920

gen husb = sex==1 & relate==1 & sploc==2
gen wife = sex==2 & relate==2 & sploc==1

tab husb if sex==1
tab wife if sex==2

sort serial relate
gen husbbpl = bpl if husb==1
replace husbbpl = husbbpl[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wifebpl = bpl if wife==1
replace wifebpl = wifebpl[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen husbrace = race if husb==1
replace husbrace = husbrace[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wiferace = race if wife==1
replace wiferace = wiferace[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen husbage = age if husb==1
replace husbage = husbage[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wifeage = age if wife==1
replace wifeage = wifeage[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

gen spouseage = husbage if wife==1
replace spouseage = wifeage if husb==1
gen agegap = husbage - wifeage

gen spousebpl = husbbpl if wife==1
replace spousebpl = wifebpl if husb==1

gen spouserace = husbrace if wife==1
replace spouserace = wiferace if husb==1

gen husboccscore = occscore if husb==1
replace husboccscore = husboccscore[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1
gen husboccscoreqtl = qtl_ownoccscore if husb==1
replace husboccscoreqtl = husboccscoreqtl[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen spouseregion_birth = 11 if spousebpl==9 | spousebpl==23 | spousebpl==25 | spousebpl==33 | spousebpl==44 | spousebpl==50
replace spouseregion_birth = 12 if spousebpl==36 | spousebpl==34 | spousebpl==42
replace spouseregion_birth = 21 if spousebpl==17 | spousebpl==18 | spousebpl==26 | spousebpl==39 | spousebpl==55
replace spouseregion_birth = 22 if spousebpl==19 | spousebpl==20 | spousebpl==27 | spousebpl==29 | spousebpl==31 | spousebpl==38 | spousebpl==46
replace spouseregion_birth = 31 if spousebpl== 10 | spousebpl==11 | spousebpl==24 | spousebpl==12 | spousebpl==13 | spousebpl==37 | spousebpl==45 | spousebpl==51 | spousebpl==54
replace spouseregion_birth = 32 if spousebpl==1 | spousebpl==21 | spousebpl==28 | spousebpl==47
replace spouseregion_birth = 33 if spousebpl==5 | spousebpl==22 | spousebpl==40 | spousebpl==48
replace spouseregion_birth = 41 if spousebpl==4 | spousebpl==8 | spousebpl==16 | spousebpl==32 | spousebpl==30 | spousebpl==35 | spousebpl==56 | spousebpl==49
replace spouseregion_birth = 42 if spousebpl==6 | spousebpl==41 | spousebpl==53
	
gen spouseregion_birth_coarse = floor(spouseregion_birth/10)
replace spouseregion_birth_coarse = 5 if spousebpl>=100 & spousebpl!=.

drop husb wife

keep if age>=30 & age<=45

gen married = marst==1 | marst==2
gen ever_married = marst!=6

**** Assign husband's and wife's father's occwealth ***

gen husb = sex==1 & relate==1 & sploc==2
gen wife = sex==2 & relate==2 & sploc==1

tab husb if sex==1
tab wife if sex==2

sort serial relate
gen husbfirstsoundex = firstsoundex if husb==1
replace husbfirstsoundex = husbfirstsoundex[_n-1] if serial==serial[_n-1] & wife==1 & husb[_n-1]==1

gen wifefirstsoundex = firstsoundex if wife==1
replace wifefirstsoundex = wifefirstsoundex[_n+1] if serial==serial[_n+1] & husb==1 & wife[_n+1]==1

	append using $tempdir/panel_trends_30yr_income_sdx.dta
	save $tempdir/panel_trends_30yr_income_sdx.dta, replace
	
